# Dickstein, Ho, and Mark (2023)
# This script estimating small group consumer surplus.

# * # * # * # * # * # * #
# PRELIMINARIES         #
# * # * # * # * # * # * #

setwd("../../../../library")
source("PreliminariesCode.R")
library(AER)
library(mlogit)

# Which rating areas?
ravec = 1:7

# Which years?
yrvec = 2015:2016

# What percent of the premium is covered by an employer?
nu = .65

# What alpha-psi cutoff to use: 
alphampsicutoff <- .05

# Where is the counterfactual folder? 
counterfactual_folder <- paste0(project_folder, "/analysis/counterfactuals")

# * # * # * # * # * # * #
# LOADING DATA          #
# * # * # * # * # * # * #

# Calling the modify exploded data function
source(paste0(project_folder, "/library/DA01QuanModifyExplodedDFunc.R"))

# Loading household-level dataset
SubDat_List<- list()
for (i in 5:7){
  SubDat_List[[i]] <- fread(paste0(project_folder, "/data/orig/SubData", Shortyear.Vec[i], "7.csv"))
}
SubDat <- do.call("rbind", SubDat_List)

# Loading small group household-option level dataset
explodeddata_sg_pre <- fread(paste0(project_folder, "/data/explodeddata_sg_pre.csv"))
explodeddata_sg_pre <- explodeddata_sg_pre[ year %in% yrvec]

## Demand Parameters ##

# Loading estimated parameters for switchers:
SGMarketEstimates <- fread(paste0(project_folder, "/analysis/estimationcode/specs/lowrisk_simplemh_censor0.2_switcher/output/solution.csv"))
# Kondo.
SGMarketEstimates <- SGMarketEstimates[, -c(1,2)]

# Loading the number of beta1, 2, 3, 0 parameters:
SGMarketlabels <- fread(paste0(project_folder, "/analysis/estimationcode/specs/lowrisk_simplemh_censor0.2_switcher/output/covar_lens.csv"))
SGMarSE <- cumsum(SGMarketlabels$x)

# * CREATING FINAL DATASET * #
acgquinsvec <- as.numeric(scan(file=paste0(project_folder, "/data/orig/acg_positive_quintiles"), sep = ",", what = "character")[2:7])
mod_exploded_data <- mod_exploded_data(
  explodeddata_sg_pre, 
  paste0(project_folder, "/data"),
  highmeanthresh = 0.59616,
  lowsumthresh = 0.2575,
  highsumthresh = 1.76329,
  highmaxthresh = 3.37157,
  medincome = 2.746942,
  acgquins = acgquinsvec,
  winsorbound = 89.3707340916662,
  ravec = ravec,
  yrvec = yrvec)

# Kondo.
rm(explodeddata_sg_pre)

# Adjusting to reflect SG institutional details
mod_exploded_data$silver_sub <- 0
mod_exploded_data$silver_costshare <- 0
mod_exploded_data$x_av <- ifelse(
  mod_exploded_data$x_av %in% c(0.7, 0.73, 0.87, 0.94), 
  0.7, 
  mod_exploded_data$x_av)
  
# Adding variables used to create "tiered prices"
RatioDat <- SubDat[, .(
  RatioSum,
  contractnum, 
  famtype_score = fcase(
    nspouse == 0 & ndeps == 0, 1,
    nspouse == 0 & ndeps > 0, 1.85,
    nspouse > 0 & ndeps == 0, 2,
    nspouse > 0 & ndeps > 0, 2.85),
  subscriberid, 
  year), 
]
mod_exploded_data <- merge(
  mod_exploded_data, 
  RatioDat, 
  by = c("subscriberid", "year"), 
  all.x = T)
  
# * # * # * # * # * # * # * # * #
# DERIVING V ESTIMATES          #
# * # * # * # * # * # * # * # * #

#Subsetting
ExplodedData_SG <- mod_exploded_data
ExplodedData_SG <- ExplodedData_SG[best_guess_ra %in% ravec]
ExplodedData_SG <- ExplodedData_SG[year %in% yrvec]

# Setting SG market demand parameter values
SGNamedBetaList <- list(
  beta1 = as.numeric(SGMarketEstimates[1, 1:SGMarSE[1]]),
  beta2 = as.numeric(SGMarketEstimates[1, (1 + SGMarSE[1]):SGMarSE[2]]),
  beta3 = as.numeric(SGMarketEstimates[1, (1 + SGMarSE[2]):SGMarSE[3]]),
  beta0 = as.numeric(SGMarketEstimates[1, (1 + SGMarSE[3]):SGMarSE[4]])
)

# Setting SG market demand parameter names
names(SGNamedBetaList[[1]]) <- names(SGMarketEstimates)[1:SGMarSE[1]]
names(SGNamedBetaList[[2]]) <- names(SGMarketEstimates)[(1 + SGMarSE[1]):SGMarSE[2]]
names(SGNamedBetaList[[3]]) <- names(SGMarketEstimates)[(1 + SGMarSE[2]):SGMarSE[3]]
names(SGNamedBetaList[[4]]) <- names(SGMarketEstimates)[(1 + SGMarSE[3]):SGMarSE[4]]

# Construct alpha, omega, psi, fixed effect term in the household-option level dataset:
ExplodedData_SG$alpha <- exp(as.matrix(ExplodedData_SG[, names(SGNamedBetaList$beta1), with = F]) %*% SGNamedBetaList$beta1)
ExplodedData_SG$omega <- exp(as.matrix(ExplodedData_SG[, names(SGNamedBetaList$beta2), with = F]) %*% SGNamedBetaList$beta2)
ExplodedData_SG$psi <- exp(as.matrix(ExplodedData_SG[, names(SGNamedBetaList$beta3), with = F]) %*% SGNamedBetaList$beta3)
ExplodedData_SG$fixedeffect_beta0 <- as.matrix(ExplodedData_SG[, names(SGNamedBetaList$beta0), with = F]) %*% SGNamedBetaList$beta0

# Generating household level prices: 
## no tiering
ExplodedData_SG[, price_hh_notiering := (grossprem / 100) * (1 - nu) * (1 - (best_guess_sumrate / 100)), ]
## tiering
ExplodedData_SG[, totalprem_contract := sum(grossprem), by = c("constructed_plan_year", "contractnum", "year")]
ExplodedData_SG[, totalfamtypescore_contract := sum(famtype_score), by = c("constructed_plan_year", "contractnum", "year")]
ExplodedData_SG[, grossprem_tiered := famtype_score * totalprem_contract / totalfamtypescore_contract, ]
ExplodedData_SG[, price_hh := (grossprem_tiered / 100) * (1 - nu) * (1 - (best_guess_sumrate / 100)), ]

# Generating V: 
ExplodedData_SG[, V_notiering := x_av + .5 * omega * (x_av ^ 2) - (alpha - psi) * price_hh_notiering + fixedeffect_beta0, ]
ExplodedData_SG[, V := x_av + .5 * omega * (x_av ^ 2) - (alpha - psi) * price_hh + fixedeffect_beta0, ]

# * # * # * # * # * # * # * # * # * # * # * # * #
# CREATING CONTRACT LEVEL CHARACTERISTICS       #
# * # * # * # * # * # * # * # * # * # * # * # * #
SubDat[, group_size := .N, by = c("contractnum", "year")]

# * # * # * # * # * # * # * # * # * # * # * # * #
# SAVING DATA FOR THIS ESTIMATION PROCESS       #
# * # * # * # * # * # * # * # * # * # * # * # * #
ExplodedData_SG <- merge(ExplodedData_SG, 
  SubDat, 
  by.x = c("subscriberid", "year"), 
  by.y = c("subscriberid", "year"), 
  all.x = T, 
  suffixes = c("", ".subdat"))

ExplodedData_SG[, choice := as.numeric(chosen_plan_id == constructed_plan_year), ]
nchoice <- ExplodedData_SG[, (nchoice = .N), by = c("subscriberid", "year")]

# Saving
ExplodedData_SG[, bronze := as.numeric(x_av == .6), ]
ExplodedData_SG[, silver := as.numeric(x_av == .7 | x_av == .73 | x_av == .87), ]
ExplodedData_SG[, gold := as.numeric(x_av == .8), ]
ExplodedData_SG[, platinum := as.numeric(x_av == .9), ]
varstosave <- c("subscriberid", "year", "constructed_plan_year", "choice", 
  "x_av", "group_size", "sum_concurrent_risk", "max_concurrent_risk", 
  "age", "family_size", "familytype", "nspouse", "ndeps", 
  "bronze", "silver", "gold", "platinum", "price_hh_notiering", "grossprem", "grossprem_tiered", "price_hh",
  "alpha", "psi", "omega", "fixedeffect_beta0", "V", "V_notiering")
TosaveDat <- ExplodedData_SG[, varstosave, with = F]
TosaveDat[, hhid := .GRP, by = c("subscriberid", "year")]

dir.create(paste0(counterfactual_folder, "/specs/lowrisk_simplemh_censor0.2/data"), recursive = T)
fwrite(TosaveDat, 
  paste0(counterfactual_folder, "/specs/lowrisk_simplemh_censor0.2/data/SGSurplusEstimationFile.csv"))
